data=fread("fitness_data.csv")
d=na.omit(data)
sub_IDs=sort(unique(d$subID)) #obtaining all the ids
#plotting the graph
graph <- plot_ly(d, x = ~d$`timestamp (in seconds)`,
transforms = list(
list(type = 'filter',
target = d$subID,
operation = '=',
value = sub_IDs[1]
))) %>%
add_trace(y = ~d$`Body Temperature (Celsius)`, name = "Body Temperatue",mode='lines+markers') %>%
add_trace(y = ~d$`heartrate during activity (bpm)`, name = "heartrate during activity (bpm)",mode='lines+markers', visible = F) %>%
add_trace(y = ~d$`Acceleration (x axis) in m/s2`, name = "Acceleration (x axis) in m/s2",mode='lines+markers', visible = F) %>%
add_trace(y = ~d$`Acceleration (y axis) in m/s2`, name = "Acceleration (y axis) in m/s2",mode='lines+markers', visible = F) %>%
add_trace(y = ~d$`Acceleration (z axis) in m/s2`, name = "Acceleration (z axis) in m/s2",mode='lines+markers', visible = F) %>%
add_trace(y = ~d$`Gyroscope (x axis) in rad/s`, name = "Gyroscope (x axis) in rad/s",mode='lines+markers',visible = F) %>%
add_trace(y = ~d$`Gyroscope (y axis) in rad/s`, name = "Gyroscope (y axis) in rad/s",mode='lines+markers', visible = F) %>%
add_trace(y = ~d$`Gyroscope (z axis) in rad/s`, name = "Gyroscope (z axis) in rad/s", mode='lines+markers',visible = F) %>%
add_trace(y = ~d[,12], name = "Magnetometer (x axis) in μT",mode='lines+markers',mode='lines+markers', visible = F) %>%
add_trace(y = ~d[,13], name = "Magnetometer (y axis) in μT",mode='lines+markers',mode='lines+markers', visible = F) %>%
add_trace(y = ~d[,14], name = "Magnetometer (z axis) in μT",mode='lines+markers', mode='lines+markers',visible = F) %>%
layout(
title = "Fitness data",
xaxis = list(
title="x(timestamp in seconds)",
rangeselector = list(buttons =
list(list(
count = 3,
stepmode = "backward"),
list(step = "all"))),
rangeslider = list(type = "time")),
yaxis = list(title = "y"),
updatemenus = list(
list(
type='dropdown',
active=0,
x=1.3,y = 0.9,
buttons= list(
list(method="restyle",args=list("transforms[0].value",sub_IDs[1]),
label=sub_IDs[1]),
list(method="restyle",args=list("transforms[0].value",sub_IDs[2]),
label=sub_IDs[2]),
list(method="restyle",args=list("transforms[0].value",sub_IDs[3]),
label=sub_IDs[3]),
list(method="restyle",args=list("transforms[0].value",sub_IDs[4]),
label=sub_IDs[4]),
list(method="restyle",args=list("transforms[0].value",sub_IDs[5]),
label=sub_IDs[5]),
list(method="restyle",args=list("transforms[0].value",sub_IDs[6]),
label=sub_IDs[6]),
list(method="restyle",args=list("transforms[0].value",sub_IDs[7]),
label=sub_IDs[7]),
list(method="restyle",args=list("transforms[0].value",sub_IDs[8]),
label=sub_IDs[8]),
list(method="restyle",args=list("transforms[0].value",sub_IDs[9]),
label=sub_IDs[9])
)),
list(
x=1.5,y = 0.75,
buttons = list(
list(method = "restyle",
args = list("visible", list(TRUE, FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE)),
label = "Body Temperature (Celsius)"),
list(method = "restyle",
args = list("visible",list(FALSE, TRUE ,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE)),
label = "heartrate during activity (bpm)" ),
list(method = "restyle",
args = list("visible",list(FALSE,FALSE,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE)),
label = "Acceleration (x axis) in m/s2" ),
list(method = "restyle",
args = list("visible",list(FALSE,FALSE,FALSE,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE)),
label = "Acceleration (y axis) in m/s2"),
list(method = "restyle",
args = list("visible",list(FALSE,FALSE,FALSE,FALSE,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE)),
label = "Acceleration (z axis) in m/s2"),
list(method = "restyle",
args = list("visible",list(FALSE,FALSE,FALSE,FALSE,FALSE,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE)),
label = "Gyroscope (x axis) in rad/s"),
list(method = "restyle",
args = list("visible",list(FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,TRUE,FALSE,FALSE,FALSE,FALSE)),
label = "Gyroscope (y axis) in rad/s"),
list(method = "restyle",
args = list("visible",list(FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,TRUE,FALSE,FALSE,FALSE)),
label = "Gyroscope (z axis) in rad/s"),
list(method = "restyle",
args = list("visible",list(FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,TRUE,FALSE,FALSE)),
label = "Magnetometer (x axis) in μT"),
list(method = "restyle",
args = list("visible",list(FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,TRUE,FALSE)),
label = "Magnetometer (y axis) in μT"),
list(method = "restyle",
args = list("visible",list(FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,TRUE)),
label = "Magnetometer (z axis) in μT")
))))
graph
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
Insights gleaned from the plot:- 1.For most of the people,body temperature increases and then decreases with time. 2. The variation in all the other parameters is random.
What is undersampling and oversampling? Consider the dataset subject.csv. Is there a case of undersampling or oversampling? If so, mention a technique to remedy the problem. Justify your answer.
#Source: https://zyxo.wordpress.com/2008/12/30/oversampling-or-undersampling/
subject_df = read.csv("subject.csv") #reading the csv file
print(subject_df)
## Subject.ID Sex Age..years. Height..cm. Weight..kg. Resting.HR.BPM.
## 1 101 Male 27 182 83 70
## 2 102 Female 25 169 75 69
## 3 103 Male 31 187 78 60
## 4 104 Male 24 194 74 86
## 5 105 Male 26 180 92 60
## 6 106 Male 26 183 68 87
## 7 107 Male 23 173 95 66
## 8 108 Male 32 179 58 65
## 9 109 Male 31 168 73 54
## Max.HR..bpm. Dominant.hand
## 1 193 right
## 2 195 right
## 3 189 right
## 4 196 right
## 5 194 right
## 6 194 right
## 7 197 right
## 8 188 left
## 9 189 right
#Checking for undersampling or oversampling in the csv
#We find that the current data is highly imbalanced.
subject_aggregate_df = aggregate(subject_df$Dominant.hand, by=list(Sex=subject_df$Sex), FUN=length)
barplot(subject_aggregate_df$x,names.arg = subject_aggregate_df$Sex, ylab = "Population")
#The remedy to the problem is either to oversample the Female population or under sample the males
#Oversampling the females seems to be a better approach
Answer 2 : The remedy to the problem is either to oversample the Female population or under sample the males Oversampling the females is a better approach.
There are various techniques for sampling data. Suggest a sampling technique that you think is ideal for the data in fitness_data.csv, and justify your choice.
fitness_df = read.csv("fitness_data.csv")
activities_df = read.csv("activities.csv")
#Through these datasets, we notice that there exists only one non-medical field recorded, which is activity ID.
#There are a total of 17 activities being performed, and hence Cluster sampling technique can be utilised to sample the subjects.
Answer 3: Through these datasets, we notice that there exists only one non-medical field recorded, which is activity ID. There are a total of 17 activities being performed, and hence Cluster sampling technique can be utilised to sample the subjects.
In August 2018, Election Commission of India made Lok sabha 2014(Lok Sabha-2014 data.csv) data public so that analysts can use it for 2019 Lok Sabha election. Provide a suitable visualisation that accounts for the distribution of votes across the country.
#INPUTTING GOOGLE API KEY #INTERNET CONNECTION REQUIRED TO RUN THIS
register_google(key = 'AIzaSyDwCTPxbxMRd-nYr9b5zNs2FW8jbGluJe0')
map <- get_map(location = 'India', zoom = 5)
## Source : https://maps.googleapis.com/maps/api/staticmap?center=India&zoom=5&size=640x640&scale=2&maptype=terrain&language=en-EN&key=xxx-nYr9b5zNs2FW8jbGluJe0
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=India&key=xxx-nYr9b5zNs2FW8jbGluJe0
#let's read in the csv
election_df = read.csv("Lok Sabha-2014 data.csv")
#print(election_df)
#Q4 SHINY APP
#We are visualising the spread of voters for each party as it tells us how much prevelance or success a party has had in a certain regioon.
#The circles on map locate the region and the radius of each circle is the corresponding margin of votes by which they won.
#We are using a shiny party selector drop down menu.
ui<-shinyUI(fluidPage(
titlePanel("Election Results"),
# Your input selection
sidebarPanel(
selectInput("party", "Choose you Input:", choices = unique(election_df$PARTY))
),
# Show the selected plot
mainPanel(
plotOutput("whichplot")
)
))
server<-shinyServer(function(input, output) {
# Fill in the spot we created for a plot
output$whichplot <- renderPlot({
bjp = subset(election_df,election_df$PARTY ==input$party,)
points <- ggmap(map) + geom_point(aes(x = longitude, y = latitude,size = MARGIN), data = bjp ,alpha = .4,color= "orange")
points <- points + scale_size_area(name = "MARGIN WIN")
points
})
})
shinyApp(ui, server)
Many good Bollywood movies were released in 2019, one of them being Kabir Singh. The file tweets.txt contains what people have tweeted about this movie. Provide suitable visualization that depicts the generals sentiment of the audience.
library(tidyr)
##
## Attaching package: 'tidyr'
## The following object is masked from 'package:reshape2':
##
## smiths
library(wordcloud)
library(reshape2)
library(dplyr)
library(tidytext)
filePath <- "tweets.txt"
text <- readLines(filePath)
text <- c(text)
text_df <- tibble(line = 1:12596, text = text)
text_df<-text_df %>%
unnest_tokens(word, text)
text_df%>%
count(word, sort = TRUE)
## # A tibble: 19,761 x 2
## word n
## <chr> <int>
## 1 â 15272
## 2 kabirsingh 7666
## 3 ã 7639
## 4 the 5506
## 5 a 4659
## 6 shahidkapoor 4188
## 7 kabir 4116
## 8 is 4074
## 9 singh 3862
## 10 of 3710
## # … with 19,751 more rows
nrc_joy <- get_sentiments("nrc") %>%
filter(sentiment == "joy")
text_df %>%
inner_join(nrc_joy) %>%
count(word, sort = TRUE)
## Joining, by = "word"
## # A tibble: 291 x 2
## word n
## <chr> <int>
## 1 love 912
## 2 good 529
## 3 brilliant 206
## 4 happy 197
## 5 outstanding 137
## 6 beautiful 123
## 7 finally 114
## 8 star 111
## 9 success 109
## 10 music 105
## # … with 281 more rows
par(mar=c(0,1.5,0.5,0.5),mgp=c(10,1,0))
text_df %>%
inner_join(get_sentiments("bing")) %>%
count(word, sentiment, sort = TRUE) %>%
acast(word ~ sentiment, value.var = "n", fill = 0) %>%
comparison.cloud(colors = c("red", "blue"),
max.words = 100)
## Joining, by = "word"